import htmllib
import formatter
import string
import urllib, urlparse

class htmlLinks(htmllib.HTMLParser):
    """
    Extract links from html-page
    http://mail.python.org/pipermail/python-list/2000-May/034400.html
    """

    def __init__(self, base):
        htmllib.HTMLParser.__init__(self, formatter.NullFormatter())
        self.anchors = []
        self.base = base

    def anchor_bgn(self, href, name, type):
        self.save_bgn()
        if self.base:
            self.anchor = urlparse.urljoin(self.base, href)
        else:
            self.anchor = href

    def anchor_end(self):
        text = string.strip(self.save_end())
        if self.anchor and text:
            self.anchors.append((self.anchor, text))

if __name__ == '__main__':

    URL = "http://www.pythonware.com"
    f = urllib.urlopen(URL)
    p = htmlLinks(URL)

    #f = open('x.html', 'r')
    #p = htmlLinks('')

    p.feed(f.read())
    p.close()

    #print "anchors =", p.anchors
    #print "title =", p.title
    for a in p.anchors:
        print a[0],' -- '*3 ,a[1]
